# -*- coding:utf-8 -*-
# @Author: shenyuyu
# @Time: 2023/6/27 16:16
# @File: qu_1.py

from pyspark import SparkConf, SparkContext
import re

if __name__ == '__main__':
    conf = SparkConf().setAppName("a").setMaster("local[*]")
    sc = SparkContext(conf=conf)
    rdd = sc.textFile("file:///tmp/pycharm_project_161/data/accumulator_broadcast_data.txt")
    rdd1 = rdd.flatMap(lambda x: x.split(" ")).filter(lambda x: x != "")
    # print(rdd1.collect())
    rdd2 = rdd1.filter(lambda x: len(re.findall("[a-z0-9A-Z]", x[0], re.S)) != 1)
    print("特殊字符数量：" + str(len(rdd2.collect())) + "如下所示：")
    print(rdd2.collect())
